# -*- coding:utf8 -*-
# !/usr/bin/env python

"""
#全国企业信用信息公示系统（贵州）
#维护黄羽
"""

import re
from scpy.logger import get_logger
import copy
import sys
import requests
import time
import traceback
import json
# import hashlib
from utils import kill_captcha
# from table import index, report_index, table_clean, parse_time
import gz_trans_dict as TR
import sd_template_dict as TE
import sd_format as FO
from gz_remove_noise import remove_noise

reload(sys)
sys.setdefaultencoding('utf8')

logger = get_logger(__file__)

UA = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36'


def download_captcha_kill(companyName):
    req = requests.session()
    req.headers = {
        'Referer': 'http://gsxt.gzgs.gov.cn/list.jsp',
        'User-Agent': UA,
    }
    img_response = req.get(
        'http://gsxt.gzgs.gov.cn/search!generateCode.shtml?validTag=searchImageCode&type=2&1467623176794', timeout=8)
    cookie = img_response.cookies.items()
    if cookie and cookie[0] and len(cookie[0]) == 2:
        cookie = cookie[0][1]
    else:
        raise Exception("cookie 获取失败")
    img_1 = img_response.content
    # print img_1
    # with open('./gz_1.jpg', 'wb') as fp:
    #     fp.write(img_1)
    time.sleep(2)
    img_2 = req.get('http://gsxt.gzgs.gov.cn/search!generateCode.shtml?validTag=searchImageCode&type=2&1461730245084',
                    timeout=8).content
    img_3 = req.get('http://gsxt.gzgs.gov.cn/search!generateCode.shtml?validTag=searchImageCode&type=2&1461730245084',
                    timeout=8).content
    # with open('./gz_2.jpg', 'wb') as fp:
    #     fp.write(img_2)
    try:
        img_res = remove_noise(img_1, img_2, img_3)
        captcha = kill_captcha(img_res, 'gz', 'jpg')
    except Exception, e:
        logger.error("破解验证码的服务，出现异常")
        logger.error(e)
        raise e
    if not captcha or len(captcha) > 100 or str(captcha) in ['None', 'wrong']:
        return ''
    # check_url = 'http://gsxt.gzgs.gov.cn/search!searchSczt.shtml'
    check_url = 'http://gsxt.gzgs.gov.cn/query!searchSczt.shtml'
    check_data = {'q': companyName, 'validCode': captcha, }
    check_res = req.post(url=check_url, data=check_data, timeout=8).content
    # req.close()
    print check_res
    check_res = json.loads(check_res)
    if not check_res.get("successed"):
        logger.info("网站返回:%s" % check_res)
        return ''
    res_data = check_res.get("data")
    if not res_data:
        logger.info("公司不存在,输入为:%s" % companyName)
        return None

    return {"req_data": res_data[0], "cookie": cookie}


def get_company_info(res_data):
    if not res_data:
        raise Exception("req_data error")

    company_post_data = res_data.get("req_data", "")
    cookie = res_data.get("cookie", "")

    if not company_post_data or not cookie:
        raise Exception("req_data error")

    raw_dict = {
        "province": "gz",
        "type": "2",
        "html": "",
        "yearList": [],
        "keyword": "",
        "companyName": "",
        "json": "",
    }
    raw_json_data = {}

    req = requests.session()
    nbxh = company_post_data['nbxh']
    req.headers = {
        'Cookie': "JSESSIONID=%s" % cookie,
        'Referer': 'http://gsxt.gzgs.gov.cn/2016/gfgs/jbxx.jsp?k=%s' % nbxh,
        'User-Agent': UA,
    }

    # 网站里面，1表示企业，2表示个体， 爬虫里面为了兼容以前的历史数据，2代表企业，1代表个体
    if company_post_data.get('ztlx') == "1":
        req_base_data = {'c': 0, 't': 5, 'nbxh': nbxh}
        req_alter_data = {'c': 0, 't': 3, 'nbxh': nbxh}
        req_person_data = {'c': 0, 't': 8, 'nbxh': nbxh}
        req_year_list_data = {'c': 0, 't': 13, 'nbxh': nbxh}
        raw_dict["type"] = "2"
    else:
        req_base_data = {'c': 1, 't': 1, 'nbxh': nbxh}
        req_alter_data = {'c': 1, 't': 2, 'nbxh': nbxh}
        req_person_data = {'c': 1, 't': 3, 'nbxh': nbxh}
        req_year_list_data = {'c': 1, 't': 8, 'nbxh': nbxh}
        # 指定网页的类型，个体户和企业的字段不一样，解析也不一样
        raw_dict["type"] = "1"

    # 基本信息
    base_info_url = 'http://gsxt.gzgs.gov.cn/2016/gfgs/query!searchData.shtml'
    raw_base_info = req.post(url=base_info_url, data=req_base_data, timeout=8).content
    raw_json_data["base"] = raw_base_info

    # 变更信息
    try:
        raw_alter_info = req.post(url=base_info_url, data=req_alter_data, timeout=8).content
    except: raw_alter_info = ''
    raw_json_data["alter"] = raw_alter_info

    # 股东信息
    try:
        raw_share_info = req.post(url=base_info_url, data={'c': 2, 't': 3, 'nbxh': nbxh}, timeout=8).content
    except: raw_share_info = ''
    # TODO 详情, 新网站没有该接口了
    raw_share_detail_list = []
    # # share_detail_url = "http://gsxt.gzgs.gov.cn/gfgs/search!searchTzr.shtml"
    # share_detail_url = "http://gsxt.gzgs.gov.cn/2016/gfgs/query!searchTzr.shtml"
    # if raw_share_info:
    #     for item in json.loads(raw_share_info).get("data", []):
    #         detail_czmc = item.get("czmc", "")
    #         if detail_czmc:
    #             try:
    #                 raw_share_detail_list.append(req.post(
    #                     url=share_detail_url,
    #                     data={'c': 2, 't': 4, 'nbxh': nbxh, 'czmc': detail_czmc},
    #                     timeout=8).content)
    #             except:
    #                 continue

    raw_json_data["share"] = raw_share_info
    raw_json_data["share_detail"] = raw_share_detail_list

    # 主要人员信息
    try:
        raw_person_info = req.post(url=base_info_url, data=req_person_data, timeout=8).content
    except: raw_person_info = ''
    raw_json_data["person"] = raw_person_info

    # 分支机构信息
    try:
        raw_branch_info = req.post(url=base_info_url, data={'c': 0, 't': 9, 'nbxh': nbxh}, timeout=8).content
    except: raw_branch_info = ''
    raw_json_data["branch"] = raw_branch_info

    # 清算信息
    try:
        raw_liquidation_info = req.post(url=base_info_url, data={'c': 0, 't': 36, 'nbxh': nbxh}, timeout=8).content
    except: raw_liquidation_info = ''
    raw_json_data["liquidation"] = raw_liquidation_info

    # 行政许可信息
    xk_url = 'http://gsxt.gzgs.gov.cn/2016/gfgs/query!searchOldData.shtml'
    try:
        raw_xk_info = req.post(url=xk_url, data={'c': 0, 't': 37, 'nbxh': nbxh}, timeout=8).content
    except: raw_xk_info = ''
    raw_json_data["xk"] = raw_xk_info

    """警示信息"""
    # 经营异常
    try:
        raw_abnormal_info = req.post(url=base_info_url, data={'c': 0, 't': 33, 'nbxh': nbxh}, timeout=8).content
    except: raw_abnormal_info = ''
    raw_json_data["abnormal"] = raw_abnormal_info

    # 抽查检查
    try:
        raw_check_message_info = req.post(url=base_info_url, data={'c': 0, 't': 35, 'nbxh': nbxh}, timeout=8).content
    except: raw_check_message_info = ''
    raw_json_data["check_message"] = raw_check_message_info

    # check_message_url = 'http://gsxt.gzgs.gov.cn/nzgs/search!checkJyyc.shtml'
    # raw_check_message_info = req.post(url=check_message_url, data={'nbxh': nbxh}).content

    """企业及时信息"""
    # 股东及出资信息
    # raw_share_detail_list_2 = []
    try:
        raw_share_detail_list_2 = req.post(url=base_info_url, data={'c': 0, 't': 40, 'nbxh': nbxh}, timeout=8).content
    except: raw_share_detail_list_2 = ''
    raw_json_data["share_detail_2"] = raw_share_detail_list_2

    # 基本信息的股权变更信息
    try:
        raw_equity_change = req.post(url=base_info_url, data={'c': 0, 't': 23, 'nbxh': nbxh}, timeout=8).content
    except: raw_equity_change = ''
    raw_json_data["equity_change"] = raw_equity_change

    # 行政许可信息2
    try:
        raw_xk_info_2 = req.post(url=base_info_url, data={'c': 0, 't': 20, 'nbxh': nbxh}, timeout=8).content
    except: raw_xk_info_2 = ''
    raw_json_data["xk_2"] = raw_xk_info_2

    # 知识产权出质登记信息
    try:
        raw_patent = req.post(url=base_info_url, data={'c': 0, 't': 21, 'nbxh': nbxh}, timeout=8).content
    except: raw_patent = ''
    raw_json_data["patent"] = raw_patent

    # 行政处罚信息
    try:
        raw_case_info = req.post(url=base_info_url, data={'c': 0, 't': 22, 'nbxh': nbxh}, timeout=8).content
    except: raw_case_info = ''
    raw_json_data["case_info"] = raw_case_info

    # 年报信息
    try:
        raw_year_info = req.post(url=base_info_url, data=req_year_list_data, timeout=8).content
        raw_year_info = json.loads(raw_year_info).get('data', [])
    except: raw_year_info = []

    raw_year_list = []
    # year_info_url = "http://gsxt.gzgs.gov.cn/gfgs/search!searchNbxx.shtml"
    year_info_url = "http://gsxt.gzgs.gov.cn/2016/gfgs/query!searchNbxx.shtml"
    for item in raw_year_info:
        lsh = item.get("lsh")
        if raw_dict.get("type") == "2":
            req_year_base_data = {'c': 0, 't': 14, 'nbxh': nbxh, 'lsh': lsh}
            req_year_website_data = {'c': 0, 't': 15, 'nbxh': nbxh, 'lsh': lsh}
            req_year_assets_data = {'c': 0, 't': 16, 'nbxh': nbxh, 'lsh': lsh}
        else:
            req_year_base_data = {'c': 1, 't': 4, 'nbxh': nbxh, 'lsh': lsh}
            req_year_website_data = {'c': 1, 't': 7, 'nbxh': nbxh, 'lsh': lsh}
            req_year_assets_data = {'c': 1, 't': 6, 'nbxh': nbxh, 'lsh': lsh}

        raw_year_dict = {}
        year = item.get("nd", "")
        raw_year_dict["year"] = year
        logger.info("正在获取%s年" % year)

        # 基本信息
        try:
            raw_year_base_info = req.post(url=year_info_url, data=req_year_base_data, timeout=8).content
        except: raw_year_base_info = ''
        raw_year_dict["base"] = raw_year_base_info

        # 网站
        try:
            raw_year_website_info = req.post(url=year_info_url, data=req_year_website_data, timeout=8).content
        except: raw_year_website_info = ''
        raw_year_dict["website"] = raw_year_website_info

        # 企业资产状况信息
        try:
            raw_year_assets_info = req.post(url=year_info_url, data=req_year_assets_data, timeout=8).content
        except: raw_year_assets_info = ''
        raw_year_dict["assets"] = raw_year_assets_info

        # 对外投资信息
        try:
            raw_year_invest_info = req.post(url=year_info_url, data={'c': 0, 't': 18, 'nbxh': nbxh, 'lsh': lsh}, timeout=8).content
        except: raw_year_invest_info = ''
        raw_year_dict["invest"] = raw_year_invest_info

        # 股东及出资信息
        try:
            raw_year_investor_info = req.post(url=year_info_url, data={'c': 0, 't': 19, 'nbxh': nbxh, 'lsh': lsh}, timeout=8).content
        except: raw_year_investor_info = ''
        raw_year_dict["investor"] = raw_year_investor_info

        # 对外提供保证担保信息
        try:
            raw_year_guarantee_info = req.post(url=year_info_url, data={'c': 0, 't': 24, 'nbxh': nbxh, 'lsh': lsh}, timeout=8).content
        except: raw_year_guarantee_info = ''
        raw_year_dict["guarantee"] = raw_year_guarantee_info

        # 股权变更信息
        try:
            raw_year_equity_change_info = req.post(url=year_info_url, data={'c': 0, 't': 39, 'nbxh': nbxh, 'lsh': lsh}, timeout=8).content
        except: raw_year_equity_change_info = ''
        raw_year_dict["equity_change"] = raw_year_equity_change_info

        # bg_url = 'http://gsxt.gzgs.gov.cn/nzgs/search!searchNbBgxx.shtml'
        # bg_url = 'http://gsxt.gzgs.gov.cn/gfgs/search!searchNbBgxx.shtml'
        # 修改记录
        try:
            raw_year_alter_info = req.post(url=year_info_url, data={'c': 0, 't': 41, 'nbxh': nbxh, 'lsh': lsh}, timeout=8).content
        except: raw_year_alter_info = ''
        raw_year_dict["alter"] = raw_year_alter_info

        raw_year_list.append(raw_year_dict)

    raw_dict["json"] = raw_json_data
    raw_dict["yearList"] = raw_year_list

    return raw_dict


def extract_base_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_html = raw_dict.get("json", {})
    if not raw_html:
        raise Exception("raw_dict 错误")

    # 指定网页的类型，个体户和企业的字段不一样，解析也不一样
    # 企业为2，个体为1
    raw_type = raw_dict.get("type", "")
    if raw_type == "2":
        tr_basic_dict = copy.deepcopy(TR.basic_dict)
        tr_person_dict = copy.deepcopy(TR.person_dict)
    else:
        tr_basic_dict = copy.deepcopy(TR.basic_dict_gt)
        tr_person_dict = copy.deepcopy(TR.person_dict_gt)
    res_base_dict = copy.deepcopy(TE.void_base_dict)

    """基本信息"""
    # 基本信息
    raw_base = raw_html.get("base", "")
    if not raw_base:
        raise Exception("基本信息获取失败")
    raw_base_list = json.loads(raw_base).get("data", [])
    if not raw_base_list:
        raise Exception("基本信息获取失败")
    else:
        res_base_dict["basicList"] = [FO.transform_dict(TE.basic_dict, tr_basic_dict, raw_base_list[0])]
        res_base_dict["province"] = "gz"

    # 股东信息详情
    raw_share_detail_2 = raw_html.get("share_detail_2", [])
    raw_share_detail_2 = json.loads(raw_share_detail_2).get("data", []) if raw_share_detail_2 else []
    share_holder_list_2 = []
    for a_share_detail in raw_share_detail_2:
        if a_share_detail:
            share_holder_list_2.append(FO.transform_dict(TE.shareHolder_dict, TR.shareHolder_dict_2, a_share_detail))

    # 股东信息
    raw_share = raw_html.get("share", [])
    share_holder_list = []
    raw_share_list = json.loads(raw_share).get("data", []) if raw_share else []
    for a_share in raw_share_list:
        if a_share:
            share_holder_list.append(FO.transform_dict(TE.shareHolder_dict, TR.shareHolder_dict, a_share))

    # 股东信息融合
    merge_name = set()
    for merge_share in share_holder_list_2+share_holder_list:
        a_name = merge_share.get("shareholderName", "") or ""
        if a_name:
            merge_name.add(a_name)
    merge_name = list(merge_name)

    share_holder_list_3 = []
    for a_share_2 in share_holder_list_2:
        # 不去掉share_holder_list_2的重复item，因为可能数量金额不同。
        a_share_name_2 = a_share_2.get("shareholderName", "") or ""
        for a_share in share_holder_list:
            # 在share_holder_list查找名称相同item,忽略掉股东名称为空的情况
            a_share_name_1 = a_share.get("shareholderName", "") or ''
            if a_share_name_1 and a_share_name_1 == a_share_name_2:
                a_share_type = a_share.get("shareholderType", "") or ""
                a_share_2["shareholderType"] = a_share_type
                break

        share_holder_list_3.append(a_share_2)

    share_holder_3_name_s = map(lambda _: _.get("shareholderName", ""), share_holder_list_3)

    for a_merge_name in merge_name:
        if a_merge_name not in share_holder_3_name_s:
            for a_share in share_holder_list:
                a_share_name_1 = a_share.get("shareholderName", "") or ''
                if a_share_name_1 and a_share_name_1 == a_merge_name:
                    share_holder_list_3.append(a_share)
                    # break，找到一个就停止，去掉share_holder_list的重复，因为没有详情
                    break

    res_base_dict["shareHolderList"] = share_holder_list_3

    # 变更信息
    raw_alter = raw_html.get("alter", [])
    raw_alter_list = json.loads(raw_alter).get("data", []) if raw_alter else []
    alter_list = []
    for a_alter in raw_alter_list:
        alter_list.append(FO.transform_dict(TE.alter_dict, TR.alter_dict, a_alter))
    res_base_dict["alterList"] = alter_list

    # 主要人员信息
    raw_person = raw_html.get("person", [])
    raw_person_list = json.loads(raw_person).get("data", []) if raw_person else []
    person_list = []
    for a_person in raw_person_list:
        person_list.append(FO.transform_dict(TE.person_dict, tr_person_dict, a_person))
    res_base_dict["personList"] = person_list

    # 分支机构信息
    # TODO
    # raw_branch = raw_html.get("branch", [])
    # raw_branch_list = json.loads(raw_branch).get("data", []) if raw_branch else []
    # filiation_list = []
    # for a_branch in raw_branch_list:
    #     filiation_list.append(FO.transform_dict(TE.filiation_dict, TR.filiation_dict, a_branch))
    # res_base_dict["filiationList"] = filiation_list

    # 清算信息
    # TODO
    raw_liquidation = raw_html.get("liquidation", [])
    raw_liquidation_list = json.loads(raw_liquidation).get("data", []) if raw_liquidation else []
    liquidation_list = []
    for a_liquidation in liquidation_list:
        pass
    res_base_dict["liquidationList"] = liquidation_list

    # 经营异常
    raw_abnormal = raw_html.get("abnormal", [])
    raw_abnormal_list = json.loads(raw_abnormal).get("data", []) if raw_abnormal else []
    abnormal_list = []
    for a_abnormal in raw_abnormal_list:
        abnormal_list.append(FO.transform_dict(TE.abnormalOperation_dict, TR.abnormalOperation_dict, a_abnormal))
    res_base_dict["abnormalOperation"] = abnormal_list

    # 抽查检查
    raw_check_message = raw_html.get("check_message", [])
    raw_check_message_list = json.loads(raw_check_message).get("data", []) if raw_check_message else []
    check_list = []
    for a_check in raw_check_message_list:
        check_list.append(FO.transform_dict(TE.checkMessage_dict, TR.checkMessage_dict, a_check))
    res_base_dict["checkMessage"] = check_list

    # 清洗
    res_base_dict = FO.clean_all(res_base_dict)

    return res_base_dict


def extract_year_info(raw_dict):
    if not raw_dict:
        raise Exception("raw_dict 错误")

    raw_year_list = raw_dict.get("yearList", [])
    raw_type = raw_dict.get("type", "")
    if raw_type == "2":
        tr_base_info_dict = copy.deepcopy(TR.baseInfo_dict)
        tr_assets_info_dict = copy.deepcopy(TR.assetsInfo_dict)
    else:
        tr_base_info_dict = copy.deepcopy(TR.baseInfo_dict_gt)
        tr_assets_info_dict = copy.deepcopy(TR.assetsInfo_dict_gt)

    res_year_list = []
    for a_year in raw_year_list:
        res_year_dict = copy.deepcopy(TE.void_year_dict)
        year = a_year.get("year", "")
        res_year_dict["year"] = year

        # 基本信息
        raw_year_base = a_year.get("base", "")
        raw_year_base_list = json.loads(raw_year_base).get("data", []) if raw_year_base else []
        if not raw_year_base_list:
            continue
        for item in raw_year_base_list:
            res_year_dict["baseInfo"] = FO.transform_dict(TE.baseInfo_dict, tr_base_info_dict, item)

        # 网站
        raw_year_website = a_year.get("website", "")
        raw_year_website_list = json.loads(raw_year_website).get("data", []) if raw_year_website else []
        res_year_dict["website"] = FO.transform_dict(TE.website_dict, TR.website_dict,
                                                     raw_year_website_list[0]) if raw_year_website_list else {}

        # 企业资产状况信息
        raw_year_assets = a_year.get("assets", "")
        raw_year_assets_list = json.loads(raw_year_assets).get("data", []) if raw_year_assets else []
        for item in raw_year_assets_list:
            res_year_dict["assetsInfo"] = FO.transform_dict(TE.assetsInfo_dict, tr_assets_info_dict, item)

        # 对外投资信息
        raw_year_invest = a_year.get("invest", "")
        raw_year_invest_list = json.loads(raw_year_invest).get("data", []) if raw_year_invest else []
        year_entinv_list = []
        for item in raw_year_invest_list:
            year_entinv_list.append(FO.transform_dict(TE.entinvItem_dict, TR.year_entinvItem_dict, item))
        res_year_dict["entinvItemList"] = year_entinv_list

        # 发起人及出资信息
        raw_year_investor = a_year.get("investor", "")
        raw_year_investor_list = json.loads(raw_year_investor).get("data", []) if raw_year_investor else []
        year_investor_list = []
        for item in raw_year_investor_list:
            year_investor_list.append(
                FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict, item))
        res_year_dict["investorInformations"] = year_investor_list

        # # 发起人及出资信息
        # raw_year_investor = a_year.get("investor", "")
        # raw_year_investor_list = json.loads(raw_year_investor).get("data", []) if raw_year_investor else []
        # for item in raw_year_investor_list:
        #     res_year_dict["investorInformations"] = FO.transform_dict(TE.investorInformations_dict, TR.investorInformations_dict, item)

        # 对外提供保证担保信息
        raw_year_guarantee = a_year.get("guarantee", "")
        raw_year_guarantee_list = json.loads(raw_year_guarantee).get("data", []) if raw_year_guarantee else []
        for item in raw_year_guarantee_list:
            # TODO
            pass

        # 股权变更信息
        raw_year_equity_change = a_year.get("equity_change", "")
        raw_year_equity_change_list = json.loads(raw_year_equity_change).get("data",
                                                                             []) if raw_year_equity_change else []
        year_equity_list = []
        for item in raw_year_equity_change_list:
            year_equity_list.append(
                FO.transform_dict(TE.equityChangeInformations_dict, TR.equityChangeInformations_dict, item))
        res_year_dict["equityChangeInformations"] = year_equity_list

        # 修改记录
        raw_year_alter = a_year.get("alter", "")
        raw_year_alter_list = json.loads(raw_year_alter).get("data", []) if raw_year_alter else []
        year_change_list = []
        for item in raw_year_alter_list:
            year_change_list.append(FO.transform_dict(TE.changeRecords_dict, TR.changeRecords_dict, item))
        res_year_dict["changeRecords"] = year_change_list

        # 清洗
        res_year_dict = FO.clean_all(res_year_dict)

        res_year_list.append(res_year_dict)

    return res_year_list


def search2(companyName, MAXTIME=40):
    res = ''
    asic_dict = {}
    # MAXTIME = 20
    a_time = MAXTIME
    while a_time > 0:
        # print res, '*'*20
        if res is None:  # 公司不存在
            return None
        elif res == '':  # 验证码错误
            if a_time < MAXTIME:
                logger.error("重复破解验证码!当前设定重复破解次数为:%s, 剩余次数为:%s " % (MAXTIME, a_time))
            a_time -= 1
            try:
                # time.sleep(10)
                res = download_captcha_kill(companyName)
                # print res
            except Exception, e:
                traceback.print_exc(e)
                raise e
        else:
            break
    com_list = res
    res = get_company_info(com_list)
    if a_time <= 1 and res == '':
        raise Exception("多次破解验证码错误,当前设置次数为：%s" % MAXTIME)
    else:
        raw_dict = res
        try:
            asic_dict = extract_base_info(raw_dict)
            year_list = extract_year_info(raw_dict)
            company_name = asic_dict['basicList'][0].get('enterpriseName', '')
            company_name = company_name if company_name else companyName
            res['companyName'] = company_name

            asic_dict['yearReportList'] = year_list
            gate_method = {
                'url': 'http://gsxt.gzgs.gov.cn/',
                'method': 'post',
                'province': 'gz',
                'companyName': company_name,
                'data': com_list,
            }

            return res, asic_dict, gate_method

        except Exception, e:
            traceback.print_exc(e)
            logger.info(e)
            res['companyName'] = companyName
            gate_method = {
                'url': 'http://gsxt.gzgs.gov.cn/',
                'method': 'post',
                'province': 'gz',
                'companyName': companyName,
                'data': com_list,
            }
            return res, None, gate_method


def search(companyName):
    res = search2(companyName)
    if not res:
        return None
    else:
        return res[1]


def search3(gate_method):
    if 'data' not in gate_method:
        raise Exception("gate_method error, doesn't have `data` key")
    com_list = gate_method.get('data')
    res = get_company_info(com_list)
    companyName = gate_method.get('companyName', '')

    raw_dict = res
    try:
        asic_dict = extract_base_info(raw_dict)
        year_list = extract_year_info(raw_dict)
        company_name = asic_dict['basicList'][0].get('enterpriseName', '')
        company_name = company_name if company_name else companyName
        res['companyName'] = company_name

        asic_dict['yearReportList'] = year_list
        gate_method = {
            'url': 'http://gsxt.gzgs.gov.cn/',
            'method': 'post',
            'province': 'gz',
            'companyName': company_name,
            'data': com_list,
        }

        return res, asic_dict, gate_method

    except Exception, e:
        logger.info(e)
        res['companyName'] = companyName
        gate_method = {
            'url': 'http://gsxt.gzgs.gov.cn/',
            'method': 'post',
            'province': 'gz',
            'companyName': companyName,
            'data': com_list,
        }
        return res, None, gate_method


if __name__ == "__main__":
    # companyName = '贵州茅台酒股份有限公司'
    # companyName = '贵州中品大数据信用管理科技有限公司'
    # companyName = '贵州金立信商贸有限公司'
    # companyName = '道真凯迪绿色能源开发有限公司'
    # companyName = '贵州水务股份有限公司'
    # companyName = '贵州光明房地产开发有限责任公司'
    # companyName = '东屹建设集团股份有限公司'
    # companyName = '贵州斯瑞医药有限责任公司'
    # companyName = '瓮安县上场坪新井煤矿'
    # companyName = '中国联合网络通信有限公司江口县分公司'
    # companyName = '黔南州黔都出租车服务有限公司'
    # companyName = '贵州益佰制药股份有限公司'
    # companyName = '贵州董酒股份有限公司'
    # companyName = '七星关区八寨镇吴雄食品店'
    # companyName = '思南县乐兴金银花种植基地'
    # companyName = '贵阳视恒创新科技有限公司龙里分公司'
    companyName = '贵州省平坝县弘德实业有限责任公司'
    # companyName = '思南县长坝乡一鑫采石场'

    # 经营异常
    # companyName = '贵州京顺鑫房地产开发有限公司'
    # companyName = '遵义市红花岗区后坝石粉厂'

    # 抽查检查
    # companyName = '都匀昊域投资有限公司'
    # companyName = '黔南州华欣工贸有限公司华欣饮料厂'

    # res_data = download_captcha_kill(companyName)
    # get_company_info(res_data)
    res = search2(companyName)
    print json.dumps(res, indent=4, ensure_ascii=False)
